package com.analysis

//各个国家的总销售额分布



import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._

object CountrySalesAmount {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("Sales Amount Distribution by Country")
      .master("local[*]")
      .getOrCreate()

    val cleanedDF = spark.read.parquet("output/cleaned_data.parquet")

    val resultDF = cleanedDF
      .withColumn("TotalPrice", col("Quantity") * col("UnitPrice"))
      .groupBy("Country")
      .agg(sum("TotalPrice").as("TotalSalesAmount"))
      .orderBy(desc("TotalSalesAmount"))

    resultDF.write
      .mode("overwrite")
      .json("output/analysis_results/country_sales_amount")

    resultDF.show()

    spark.stop()
  }
}